*WHAT'S ACROSS THE BORDER?
*JHA, NEUMARK, AND RODRIGUEZ-LOPEZ

/*
	This do file creates the master data for county pairs. It contains
	employment, number of establishments, payroll, and minimum wages from 1990 to 2016.
	I use the max_mw from VZ_state_annual.dta. 	
*/
	
include directories_build.do


* Load data 

use "`dlrdir'`s'dlrmscz_pairs_sic87xx_years.dta", clear

preserve
			use "`datadir'`s'cbp_county_state_20ind_sample.dta", replace
			compress
			merge m:1 cty_fips using "`dlrdir'`s'dlrmscz_counties.dta"
			keep if _merge==3
			drop _merge
			tempfile temp1
			save `temp1', replace
restore

merge m:1 cty_fips sic87xx year using "`temp1'"

keep if _merge==3

drop _merge


* Merge ranking data
merge m:1 sic87xx using "`cbpdir'`s'cbp_sic87xx_wagerank.dta"  


drop _merge

egen pair_sic87xx_year=group(countypair_id sic87xx year)

*REMOVE COMPARISON WITHIN STATES (DLR USE MW DATA FOR REGIONS WITHIN STATES -- VERY FEW)
gen cty_fips2=substr(countypair_id,7,5)
gen cty_fips1=substr(countypair_id,1,5)
destring cty_fips1 cty_fips2, replace
gen state1=int(cty_fips1/1000)
gen state2=int(cty_fips2/1000)
drop if state1==state2
replace state=int(cty_fips/1000)

tsset pair_sic87xx_year state

gen stcz=0
replace stcz=1 if L.state<state | L2.state<state | L3.state<state | L4.state<state | L5.state<state | L6.state<state | L7.state<state | L8.state<state | L9.state<state | L10.state<state | L11.state<state | L12.state<state | L13.state<state | L14.state<state | L15.state<state | L16.state<state | L17.state<state | L18.state<state | L19.state<state | L20.state<state | L21.state<state | L22.state<state | L23.state<state | L24.state<state | L25.state<state | L26.state<state | L27.state<state | L28.state<state | L29.state<state | L30.state<state | L31.state<state | L32.state<state | L33.state<state | L34.state<state | L35.state<state | L36.state<state | L37.state<state | L38.state<state | L39.state<state | L40.state<state | L41.state<state | L42.state<state | L43.state<state | L44.state<state | L45.state<state | L46.state<state | L47.state<state | L48.state<state | L49.state<state | L50.state<state | L51.state<state | L52.state<state | L53.state<state | L54.state<state | L55.state<state | L56.state<state | L57.state<state | L58.state<state | L59.state<state | L60.state<state
order year sic87xx countypair_id state stcz
tsset pair_sic87xx_year stcz


*USE THE MAXIMUM MINIMUM WAGE
gen mw=max_mw

foreach b in "cty_fips" "czone" "multi" "state" "emp" "ap" "qp1" "est" "mw" "totpop" "workagepop" "totemp" "totap" "totqp1" "totest" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" {
gen `b'_0=L.`b'
gen `b'_1=`b'
}

keep if stcz==1


drop state-max_mw totemp-mw

label var sic87xx "20 industries"

foreach t in "0" "1" {
label var cty_fips_`t' "County FIPS code `t'"
label var state_`t' "State FIPS code `t'"
label var czone_`t' "AADHP commuting zones"
label var est_`t' "Number of establishments `t'"
label var emp_`t' "Employment `t'"
label var qp1_`t' "Nominal first quarter payroll `t'"
label var ap_`t' "Nominal annual payroll `t'"
label var mw_`t' "Minimum wage `t'"
label var totest_`t' "Number of establishments (czone-state)"
label var totemp_`t' "Employment (czone-state)"
label var totqp1_`t' "Nominal first quarter payroll (czone-state)"
label var totap_`t' "Nominal annual payroll (czone-state)"
label var wageap_`t' "Nominal hourly wage using AP (czone-state-ind)"
label var wageqp_`t' "Nominal hourly wage using QP (czone-state-ind)"
label var totwageap_`t' "Nominal hourly wage using AP (czone-state)"
label var totwageqp_`t' "Nominal hourly wage using QP (czone-state)"
label var totwageapm_`t' "Nominal hourly wage using AP MINUS (czone-state)"
label var totwageqpm_`t' "Nominal hourly wage using QP MINUS (czone-state)"
label var totpop_`t' "Total population czone-state"
label var workagepop_`t' "Total working age population czone-state"

} 


label var wagerank90 "Wage ranking of industry in 1990"

compress

save "`datadir'`s'cbp_countypairs_20ind_sample.dta", replace

********************* RESTAURANTS ******************************


*Keep only restaurant industry
keep if wagerank90==1

*Drop pairs within the same state and DC pairs
drop if state_0==11
drop if state_1==11
drop if state_0==state_1
gen check1=log(mw_1)-log(mw_0)
gen check2=log(emp_1)-log(emp_0)
drop if check1==. | check2==.
drop check1 check2

gen statea=state_0
gen stateb=state_1

*****MERGE DLR PAIR IDS SO THAT WE CAN USE PAIRTIMES==2 TO IDENTIFY DLR PAIRS
gen pair_id=countypair_id
merge m:1 pair_id using "`dlrdir'`s'co_pairs_dlr.dta"
drop if _merge==2
drop _merge
drop pair_id
***********************************************************************


*czoneid gives you the number of pairs
egen countypair=group(countypair_id)

reshape long cty_fips_ state_  est_ emp_ qp1_ ap_ mw_ totest_ totemp_ totqp1_ totap_ wageap_ wageqp_ totwageap_ totwageqp_ totwageapm_ totwageqpm_ totpop_ workagepop_, i(year countypair) j(st)
drop st
foreach d in "cty_fips" "state"  "est" "emp" "qp1" "ap" "mw" "totest" "totemp" "totqp1" "totap" "wageap" "wageqp" "totwageap" "totwageqp" "totwageapm" "totwageqpm" "totpop" "workagepop" {
ren `d'_ `d'
}

ren cty_fips county

save "`datadir'`s'cbp_stacked_countypair_sample.dta", replace

clear
